#####################################################################
## NOTE:
# Computes data for analysis in Manuscript and Appendix
#####################################################################

rm(list = ls())
# set working directory to YOUR PATH TO REPLICATION MATERIALS
setwd('~/PATH TO REPLICATION MATERIALS/ZG_replication_materials/')

#### *NOTE*: install dummies version 1.5.6 as indicated below
packageurl <- "https://cran.r-project.org/src/contrib/Archive/dummies/dummies_1.5.6.tar.gz"
install.packages(packageurl, repos=NULL, type="source")

## load required packages
library(data.table)
library(stringr)
library(readxl)
library(haven)
library(dummies)

#####################
### Compute main data for analysis

#### Clean candidates' data and compute running variable for RDD
### 2010 General Election
results <- read_xlsx('./data/candidates/2010/British_Parliamentary_Constituency_General_Election_2010_Version_5.xlsx', sheet=1)
setDT(results)

results[, ConPParty:='Conservative']
results[, LabPParty:='Labour']
results[, LDPParty:='Liberal Democrats']
results[, SNPPParty:='SNP']
results[, PCPParty:='Plaid Cymru']
results[, GreenPParty:='Green Party']
results[, BNPPParty:='BNP']
results[, UKIPPParty:='UKIP']
results[,total_votes:=round((Turn10/100)*Elec10)]

candidates2010 <- melt(results, id.vars = c("RefNo", "Seat", "PAname", "Region", "total_votes"), measure = patterns('PParty', 'PPC', "vt10", "race10", "sex10"), value.name = c('party', 'name', "votes", "race", 'sex'))
# compute running variable
candidates2010[,share:=100*(votes/total_votes), by=.(RefNo,Seat)]
candidates2010[!is.na(votes),share_first:=max(share, na.rm = TRUE), by=.(RefNo,Seat,race)]
candidates2010[!is.na(votes) & share==share_first & race=='White',share_strongest_white:=share_first]
candidates2010[!is.na(votes) & share==share_first & race=='BME',share_strongest_bme:=share_first]
# note that `share_strongest_white` and `share_strongest_bme` is defined for strongest white/BME candidate in constituency,
# therefore the function `max` in next 2 lines will trigger a warning, ignore the warning
candidates2010[!is.na(votes),share_strongest_white:=max(share_strongest_white, na.rm = TRUE), by=.(RefNo,Seat)]
candidates2010[!is.na(votes),share_strongest_bme:=max(share_strongest_bme, na.rm = TRUE), by=.(RefNo,Seat)]
# note that running variable `minority_victory_margin` is undefined for constituencies with all BME candidates, or all white candidates
candidates2010[!is.na(votes), minority_victory_margin:=share_strongest_bme-share_strongest_white]
candidates2010[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2010[,Forename:=trimws(Forename, "l")]
candidates2010[,female:=ifelse(sex=='Woman',1,0)]
# if minority victory margin > 0, keep bme candidate info. otherwise keep white candidate info
candidates2010[(minority_victory_margin<0 & share==share_strongest_white),
               c('party_name', 'candidate_surname', 'candidate_forename', 'candidate_female'):=.(party,Surname,Forename,female)]
candidates2010[(minority_victory_margin>0 & share==share_strongest_bme),
               c('party_name', 'candidate_surname', 'candidate_forename', 'candidate_female'):=.(party,Surname,Forename,female)]


# subset to strongest white/bme candidates in constituencies with defined running variable, include relevant columns
candidates2010 <- candidates2010[(!is.na(party_name) & is.finite(minority_victory_margin)),
                                 .(RefNo,Region,party_name,candidate_surname,candidate_forename,candidate_female,
                                   share, minority_victory_margin)]

# add mapping between Constituency PA ID and ONS ID
ids <- fread(file = './data/candidates/2010/pa_ons_id.csv')

candidates2010 <- merge(candidates2010, ids,
                        by.x = 'RefNo', by.y = 'Press Association ID Number', all.x = TRUE)
# rename columns and add election year
setnames(candidates2010, old=c('RefNo','Region','Constituency ID','Constituency Name'),
         new = c('pa_id', 'region_name','ons_id','constituency_name'))
candidates2010[,election:=2010]
rm(ids, results)

#### 2015 General Election
candidates2015 <- fread(file = './data/candidates/2015/Data-Table 1.csv')
# remove BNP vote, as candidate's characteristics only collected for 7 biggest parties 
candidates2015[, BNPVote15:=NULL]
setcolorder(candidates2015, c('ConVote15', 'LabVote15', 'LDVote15', 'UKIPVote15', 'SNPVote15', 'PCVote15', 'GreenVote15'))
# rename columns before melting data
setnames(candidates2015,c('ONSConstID', 'ConPPC', 'ConPPCsex', 'ConPPCrace',
                          'LabPCC', 'LabPPCsex', 'LabPPCrace',
                          'LDPCC', 'LDPPCsex', 'LDPPCrace',
                          'UKIPPPC', 'UKIPPPCsex', 'UKIPPPPCrace',
                          'SNPPPC', 'SNPPPCsex', 'SNPPPCrace',
                          'PCPPC', 'PCPPCsex', 'PCPPCrace',
                          'GreenPPC', 'GreenPPCsex', 'GreenPPCrace', 'TotalVote15'),
         c('Constituency ID', 'Conname', 'ConSex', 'ConRace',
           'Labname', 'LabSex', 'LabRace',
           'LDname','LDSex', 'LDRace',
           'UKIPname','UKIPSex', 'UKIPRace',
           'SNPname','SNPSex', 'SNPRace',
           'PCname','PCSex', 'PCRace',
           'Greenname','GreenSex', 'GreenRace', 'TotalVotes15'))

candidates2015[, ConPParty:='Conservative']
candidates2015[, LabPParty:='Labour']
candidates2015[, LDPParty:='Liberal Democrats']
candidates2015[, UKIPPParty:='UKIP']
candidates2015[, SNPPParty:='SNP']
candidates2015[, PCPParty:='Plaid Cymru']
candidates2015[, GreenPParty:='Green Party']

candidates2015 <- melt(candidates2015, id.vars = c("Constituency ID", "ConstituencyName", "Region", 'TotalVotes15'), measure = patterns("PParty", "name", "Race", "Sex", "Vote15"), value.name = c("party", "name", "race", 'sex', 'votes'))
candidates2015[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2015[,Forename:=trimws(Forename, "l")]
candidates2015[,Forename:=gsub("\\s*\\([^\\)]+\\)","",as.character(candidates2015$Forename))]
candidates2015[,Forename:=sub(" .*", "", as.character(candidates2015$Forename))]
# compute running variable
candidates2015[,share:=(votes/TotalVotes15)*100]
candidates2015[,share_first:=max(share, na.rm = TRUE), by=.(`Constituency ID`, race)]
candidates2015[share==share_first & race=='White',share_strongest_white:=share_first]
candidates2015[share==share_first & race=='BME',share_strongest_bme:=share_first]
# note that `share_strongest_white` and `share_strongest_bme` is defined for strongest white/BME candidate in constituency,
# therefore the function `max` in next 2 lines will trigger a warning, ignore the warning
candidates2015[,share_strongest_white:=max(share_strongest_white, na.rm = TRUE), by=.(`Constituency ID`)]
candidates2015[,share_strongest_bme:=max(share_strongest_bme, na.rm = TRUE), by=.(`Constituency ID`)]
# note that running variable `minority_victory_margin` is undefined for constituencies with all BME candidates, or all white candidates
candidates2015[,minority_victory_margin:=share_strongest_bme-share_strongest_white]
candidates2015[,female:=ifelse(sex=='Woman',1,0)]
# if white victory margin < 0, keep white candidate info. otherwise keep bme candidate info
candidates2015[(minority_victory_margin<0 & share==share_strongest_white),
               c('party_name', 'candidate_surname', 'candidate_forename', 'candidate_female'):=.(party,Surname,Forename,female)]
candidates2015[(minority_victory_margin>0 & share==share_strongest_bme),
               c('party_name', 'candidate_surname', 'candidate_forename', 'candidate_female'):=.(party,Surname,Forename,female)]

# subset to strongest white/bme candidates in constituencies with defined running variable, include relevant columns
candidates2015 <- candidates2015[(!is.na(party_name) & is.finite(minority_victory_margin)),
                                 .(`Constituency ID`, `ConstituencyName`,
                                   Region,party_name,candidate_surname,candidate_forename,candidate_female,
                                   share,minority_victory_margin)]
# rename columns and add election year
setnames(candidates2015, old=c('Region','Constituency ID','ConstituencyName'),
         new = c('region_name','ons_id','constituency_name'))
candidates2015[,election:=2015]

#### 2017 General election
## note that we use a subset of variables from the 2017 Parliamentary Candidates UK Project data and complement it with Electoral Commission data  
candidates2017 <- fread(file = './data/candidates/2017/Parliamentary Candidates UK Project Subset.csv')
# separate candidates' forename, surname
candidates2017[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2017[,Forename:=trimws(Forename, "l")]
candidates2017[,Forename:=gsub("\\s*\\([^\\)]+\\)","",as.character(candidates2017$Forename))]
candidates2017[,Forename:=sub(" .*", "", as.character(candidates2017$Forename))]

# complement 2017 Parliamentary Candidates UK Project data with election results data from Electoral Commission
# as 2017 Parliamentary Candidates UK Project has missing data from Ireland (NOTE: Ireland is not included in analysis)
results2017 <- fread('./data/candidates/2017/2017 UKPGE electoral data 4.csv', skip = 1, encoding = 'Latin-1')
results2017[,Surname:=tolower(Surname)]
to_fix <- candidates2017[is.na(Numberofvotesobtainedbycandidate)]
to_fix[,Surname:=tolower(Surname)]
to_fix <- merge(to_fix,results2017,by.x = c('const_name','Surname'), by.y = c('Constituency','Surname'), all.x = TRUE)
# manually fix non-matching cases from Ireland
to_fix[name=='Little-Pengelly, Emma', c('ONS Code', 'Valid votes'):=.('N06000003',13299)]
to_fix[name=='McDonnell, Alasdair', c('ONS Code', 'Valid votes'):=.('N06000003',11303)]
to_fix[name=="O'Muilleoir, Mairtin", c('ONS Code', 'Valid votes'):=.('N06000003',7143)]
to_fix[name=="Atwood, Tim", c('ONS Code', 'Valid votes'):=.('N06000004',2860)]
to_fix[name=="Stewart, John", c('ONS Code', 'Valid votes'):=.('N06000005',4524)]
to_fix[name=="Campbell, Noreen", c('ONS Code', 'Valid votes'):=.('N06000007',886)]
to_fix[name=="Elliott, Tom", c('ONS Code', 'Valid votes'):=.('N06000007',24355)]
to_fix[name=="Garrity, Mary", c('ONS Code', 'Valid votes'):=.('N06000007',2587)]
to_fix[name=="Gildernew, Michelle", c('ONS Code', 'Valid votes'):=.('N06000007',25230)]
to_fix[name=="Jones, Tanya", c('ONS Code', 'Valid votes'):=.('N06000007',423)]
to_fix[name=="McCallion, Elisha", c('ONS Code', 'Valid votes'):=.('N06000008',18256)]
to_fix[name=="Nickels, Ian", c('ONS Code', 'Valid votes'):=.('N06000009',462)]
to_fix[name=="Brady, Mickey", c('ONS Code', 'Valid votes'):=.('N06000011',25666)]
to_fix[name=="Coade, Jackie", c('ONS Code', 'Valid votes'):=.('N06000011',1256)]
to_fix[name=="Irwin, William", c('ONS Code', 'Valid votes'):=.('N06000011',13177)]
to_fix[name=="McNulty, Justin", c('ONS Code', 'Valid votes'):=.('N06000011',9055)]
to_fix[name=="Nicholson, Sam", c('ONS Code', 'Valid votes'):=.('N06000011',4425)]
to_fix[name=="McCartney, Therese", c('ONS Code', 'Valid votes'):=.('N06000013',531)]

candidates2017 <- candidates2017[!is.na(Numberofvotesobtainedbycandidate)]
setnames(candidates2017, c('Numberofvotesobtainedbycandidate', 'ONSConstID', 'pano'), c('Valid votes', 'ONS Code', 'PANO'))

rmVars <- setdiff(colnames(to_fix), colnames(candidates2017))
to_fix[, (rmVars):=NULL]
candidates2017 <- rbind(candidates2017, to_fix)
# manually fix 16 cases with inaccurate ONS Code/PANO and 4 cases with inaccurate 'valid votes'
to_fix <- merge(candidates2017, results2017, by=c('ONS Code', 'Valid votes'), all.x = TRUE)
to_fix <- to_fix[is.na(`Party Identifer`)]

candidates2017[`ONS Code`=='E14000602' & name=="Langley, John", c('ONS Code', 'PANO'):=.('E14000601', 101)]
candidates2017[`ONS Code`=='E14000641' & name=="Martin, Craig", c('ONS Code', 'PANO'):=.('E14000840',  215)]
candidates2017[`ONS Code`=='E14000656' & name=="Underwood, Peter", c('ONS Code', 'PANO'):=.("E14000655",  171)]
candidates2017[`ONS Code`=='E14000759' & name=="Warrington, Rosemary", c('ONS Code', 'PANO'):=.("E14000760",  329)]
candidates2017[`ONS Code`=='E14000761' & name=="Gummer, Ben", `Valid votes`:=23393]
candidates2017[`ONS Code`=='E14000761' & name=="Martin, Sandy", `Valid votes`:=24224]
candidates2017[`ONS Code`=='E14000781' & name=="Coyle, Laura", c('ONS Code', 'PANO'):=.("E14000742", 305)]
candidates2017[`ONS Code`=='E14000822' & name=="O'Neill, Hannah", `Valid votes`:=28987]
candidates2017[`ONS Code`=='E14000880' & name=="Reed, Graham Richard", c('ONS Code', 'PANO'):=.("E14000879", 452)]
candidates2017[`ONS Code`=='E14000935' & name=="Graves, Alan Wayne", c('ONS Code', 'PANO'):=.("E14000663", 182)]
candidates2017[`ONS Code`=='E14000971' & name=="Robson, Emma", c('ONS Code', 'PANO'):=.("E14000970", 535)]
candidates2017[`ONS Code`=='E14000971' & name=="Strike, Ted", c('ONS Code', 'PANO'):=.("E14000970", 535)]
candidates2017[`ONS Code`=='E14001029' & name=="Clucas, Flo", c('ONS Code', 'PANO'):=.("E14001030", 613)]
candidates2017[`ONS Code`=='S14000002' & name=="Durkin, Richard John", c('ONS Code', 'PANO'):=.("S14000001", 3)]
candidates2017[`ONS Code`=='S14000024' & name=="Briggs, Miles Edward Franc", c('ONS Code', 'PANO'):=.("S14000025", 231)]
candidates2017[`ONS Code`=='S14000025' & name=="Smith, Steph", c('ONS Code', 'PANO'):=.("S14000024", 230)]
candidates2017[`ONS Code`=='S14000030' & name=="Clark, Matthew", `Valid votes`:=576]
candidates2017[`ONS Code`=='S14000035' & name=="Nelson, Isabel", c('ONS Code', 'PANO'):=.("S14000029", 262)]
candidates2017[`ONS Code`=='S14000037' & name=="Jardine, Christine", c('ONS Code', 'PANO'):=.("S14000026", 232)]
candidates2017[`ONS Code`=='S14000043' & name=="Batho, Sandy", c('ONS Code', 'PANO'):=.("S14000026", 232)]

# compute running variable
candidates2017 <- merge(candidates2017, results2017, by=c('ONS Code', 'Valid votes'), all.x = TRUE)
candidates2017[,total_votes:=sum(`Valid votes`), by=`ONS Code`]
candidates2017[,share:=(`Valid votes`/total_votes)*100]
candidates2017[,share_first:=max(share, na.rm = TRUE), by=.(`ONS Code`, bme)]
candidates2017[share==share_first & bme==0,share_strongest_white:=share_first]
candidates2017[share==share_first & bme==1,share_strongest_bme:=share_first]
# note that `share_strongest_white` and `share_strongest_bme` is defined for strongest white/BME candidate in constituency,
# therefore the function `max` in next 2 lines will trigger a warning, ignore the warning
candidates2017[,share_strongest_white:=max(share_strongest_white, na.rm = TRUE), by=.(`ONS Code`)]
candidates2017[,share_strongest_bme:=max(share_strongest_bme, na.rm = TRUE), by=.(`ONS Code`)]
# note that running variable `minority_victory_margin` is undefined for constituencies with all BME candidates, or all white candidates
candidates2017[,minority_victory_margin:=share_strongest_bme-share_strongest_white]
candidates2017[,female:=ifelse(sex==1,1,0)]
# if white victory margin < 0, keep white candidate info. otherwise keep bme candidate info
candidates2017[(minority_victory_margin<0 & share==share_strongest_white),
               c('party_name', 'candidate_surname', 'candidate_forename', 'candidate_female'):=.(`Party Identifer`,Surname.x,Forename,female)]
candidates2017[(minority_victory_margin>0 & share==share_strongest_bme),
               c('party_name', 'candidate_surname', 'candidate_forename', 'candidate_female'):=.(`Party Identifer`,Surname.x,Forename,female)]

# subset to strongest white/bme candidates in constituencies with defined running variable, include relevant columns
candidates2017 <- candidates2017[(!is.na(party_name) & is.finite(minority_victory_margin)),
                                 .(`ONS Code`, Constituency,
                                   region,party_name,candidate_surname,candidate_forename,candidate_female,
                                   incumbent,
                                   share, minority_victory_margin)]
# rename variables and add election year
setnames(candidates2017, old=c('region','ONS Code','Constituency'),
         new = c('region_name','ons_id','constituency_name'))
candidates2017[,election:=2017]
rm(to_fix, results2017)

## append three years of candidates data 
candidates <- rbind(candidates2010, candidates2015, candidates2017, use.names=TRUE, fill=TRUE)

## get incumbent data for three election years
incumbent_data <- fread('./data/candidates/candidates_coded_incumbency.csv')
candidates[, incumbent:=NULL]
candidates <- merge(candidates, incumbent_data[,.(election, ons_id, incumbent)], by=c('election', 'ons_id'), all.x=TRUE)

# clean party names and compute party dummies
candidates[,pa_id:=NULL]
candidates[party_name=="Scottish National Party", party_name:="SNP"]
candidates[party_name=="Green", party_name:="Green Party"]
candidates[party_name=="Liberal Democrats", party_name:="Liberal Democrat"]
# warning from `dummy` function can be ignored, data has been manually checked
candidates <- cbind(candidates, dummy(candidates$party_name, sep = '_', drop = FALSE))

rm(candidates2010, candidates2015, candidates2017, incumbent_data)

#### Merge candidates data with constituency turnout and electorate size in the next election (election t+1)
## get turnout data for England, Scotland and Wales
elections <- fread('./data/clean/ge_2015_2017_2019_ESW.csv')

#### Merge candidates with turnout
candidates[election==2010, next_election_year:=2015]
candidates[election==2015, next_election_year:=2017]
candidates[election==2017, next_election_year:=2019]

candidates <- merge(candidates, elections[, .(election, id, electorate, turnout)],
                 by.x = c('next_election_year', 'ons_id'),
                 by.y = c("election", 'id'),
                 all.x = TRUE)



##### Compute vote share of incumbent/strongest opponent party, margin of victory, effective number of parties in next election
## compute vote share and margin of victory of incumbent's party in election t+1 (note that we clean the 2015, 2017 data following same steps than in lines ~ 70--200)
### 2015 General Election
candidates2015 <- fread(file = './data/candidates/2015/Data-Table 1.csv')
# remove BNP vote, as candidate's characteristics only collected for 7 biggest parties 
candidates2015[, BNPVote15:=NULL]
setcolorder(candidates2015, c('ConVote15', 'LabVote15', 'LDVote15', 'UKIPVote15', 'SNPVote15', 'PCVote15', 'GreenVote15'))
# rename columns before melting data
setnames(candidates2015,c('ONSConstID', 'ConPPC', 'ConPPCsex', 'ConPPCrace',
                          'LabPCC', 'LabPPCsex', 'LabPPCrace',
                          'LDPCC', 'LDPPCsex', 'LDPPCrace',
                          'UKIPPPC', 'UKIPPPCsex', 'UKIPPPPCrace',
                          'SNPPPC', 'SNPPPCsex', 'SNPPPCrace',
                          'PCPPC', 'PCPPCsex', 'PCPPCrace',
                          'GreenPPC', 'GreenPPCsex', 'GreenPPCrace', 'TotalVote15'),
         c('Constituency ID', 'Conname', 'ConSex', 'ConRace',
           'Labname', 'LabSex', 'LabRace',
           'LDname','LDSex', 'LDRace',
           'UKIPname','UKIPSex', 'UKIPRace',
           'SNPname','SNPSex', 'SNPRace',
           'PCname','PCSex', 'PCRace',
           'Greenname','GreenSex', 'GreenRace', 'TotalVotes15'))

candidates2015[, ConPParty:='Conservative']
candidates2015[, LabPParty:='Labour']
candidates2015[, LDPParty:='Liberal Democrats']
candidates2015[, UKIPPParty:='UKIP']
candidates2015[, SNPPParty:='SNP']
candidates2015[, PCPParty:='Plaid Cymru']
candidates2015[, GreenPParty:='Green Party']

candidates2015 <- melt(candidates2015, id.vars = c("Constituency ID", "ConstituencyName", "Region", 'TotalVotes15'), measure = patterns("PParty", "name", "Race", "Sex", "Vote15"), value.name = c("party", "name", "race", 'sex', 'votes'))
candidates2015[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2015[,Forename:=trimws(Forename, "l")]
candidates2015[,Forename:=gsub("\\s*\\([^\\)]+\\)","",as.character(candidates2015$Forename))]
candidates2015[,Forename:=sub(" .*", "", as.character(candidates2015$Forename))]
candidates2015 <- candidates2015[!is.na(votes)]
candidates2015[,share:=(votes/TotalVotes15)*100]
candidates2015[,share_first:=max(share, na.rm = TRUE), by=.(`Constituency ID`)]
candidates2015[,share_second:=sort(share,partial=length(share)-1)[length(share)-1], by=`Constituency ID`]
candidates2015[share==share_first,victory_margin:=share-share_second]
candidates2015[share!=share_first,victory_margin:=share-share_first]
candidates2015[, election:=2015]

results2015 <- candidates2015[,.(share,victory_margin,party,`Constituency ID`,election)]
setnames(results2015, 'Constituency ID', 'id')

### 2017
candidates2017 <- fread(file = './data/candidates/2017/Parliamentary Candidates UK Project Subset.csv')
# separate candidates' forename, surname
candidates2017[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2017[,Forename:=trimws(Forename, "l")]
candidates2017[,Forename:=gsub("\\s*\\([^\\)]+\\)","",as.character(candidates2017$Forename))]
candidates2017[,Forename:=sub(" .*", "", as.character(candidates2017$Forename))]

# complement 2017 Parliamentary Candidates UK Project data with election results data from Electoral Commission
# as 2017 Parliamentary Candidates UK Project has missing data from Ireland (NOTE: Ireland is not included in analysis)
results2017 <- fread('./data/candidates/2017/2017 UKPGE electoral data 4.csv', skip = 1, encoding = 'Latin-1')
results2017[,Surname:=tolower(Surname)]
to_fix <- candidates2017[is.na(Numberofvotesobtainedbycandidate)]
to_fix[,Surname:=tolower(Surname)]
to_fix <- merge(to_fix,results2017,by.x = c('const_name','Surname'), by.y = c('Constituency','Surname'), all.x = TRUE)
# manually fix non-matching cases from Ireland
to_fix[name=='Little-Pengelly, Emma', c('ONS Code', 'Valid votes'):=.('N06000003',13299)]
to_fix[name=='McDonnell, Alasdair', c('ONS Code', 'Valid votes'):=.('N06000003',11303)]
to_fix[name=="O'Muilleoir, Mairtin", c('ONS Code', 'Valid votes'):=.('N06000003',7143)]
to_fix[name=="Atwood, Tim", c('ONS Code', 'Valid votes'):=.('N06000004',2860)]
to_fix[name=="Stewart, John", c('ONS Code', 'Valid votes'):=.('N06000005',4524)]
to_fix[name=="Campbell, Noreen", c('ONS Code', 'Valid votes'):=.('N06000007',886)]
to_fix[name=="Elliott, Tom", c('ONS Code', 'Valid votes'):=.('N06000007',24355)]
to_fix[name=="Garrity, Mary", c('ONS Code', 'Valid votes'):=.('N06000007',2587)]
to_fix[name=="Gildernew, Michelle", c('ONS Code', 'Valid votes'):=.('N06000007',25230)]
to_fix[name=="Jones, Tanya", c('ONS Code', 'Valid votes'):=.('N06000007',423)]
to_fix[name=="McCallion, Elisha", c('ONS Code', 'Valid votes'):=.('N06000008',18256)]
to_fix[name=="Nickels, Ian", c('ONS Code', 'Valid votes'):=.('N06000009',462)]
to_fix[name=="Brady, Mickey", c('ONS Code', 'Valid votes'):=.('N06000011',25666)]
to_fix[name=="Coade, Jackie", c('ONS Code', 'Valid votes'):=.('N06000011',1256)]
to_fix[name=="Irwin, William", c('ONS Code', 'Valid votes'):=.('N06000011',13177)]
to_fix[name=="McNulty, Justin", c('ONS Code', 'Valid votes'):=.('N06000011',9055)]
to_fix[name=="Nicholson, Sam", c('ONS Code', 'Valid votes'):=.('N06000011',4425)]
to_fix[name=="McCartney, Therese", c('ONS Code', 'Valid votes'):=.('N06000013',531)]

candidates2017 <- candidates2017[!is.na(Numberofvotesobtainedbycandidate)]
setnames(candidates2017, c('Numberofvotesobtainedbycandidate', 'ONSConstID', 'pano'), c('Valid votes', 'ONS Code', 'PANO'))

rmVars <- setdiff(colnames(to_fix), colnames(candidates2017))
to_fix[, (rmVars):=NULL]
candidates2017 <- rbind(candidates2017, to_fix)
# manually fix 16 cases with inaccurate ONS Code/PANO and 4 cases with inaccurate 'valid votes' 
to_fix <- merge(candidates2017, results2017, by=c('ONS Code', 'Valid votes'), all.x = TRUE)
to_fix <- to_fix[is.na(`Party Identifer`)]

candidates2017[`ONS Code`=='E14000602' & name=="Langley, John", c('ONS Code', 'PANO'):=.('E14000601', 101)]
candidates2017[`ONS Code`=='E14000641' & name=="Martin, Craig", c('ONS Code', 'PANO'):=.('E14000840',  215)]
candidates2017[`ONS Code`=='E14000656' & name=="Underwood, Peter", c('ONS Code', 'PANO'):=.("E14000655",  171)]
candidates2017[`ONS Code`=='E14000759' & name=="Warrington, Rosemary", c('ONS Code', 'PANO'):=.("E14000760",  329)]
candidates2017[`ONS Code`=='E14000761' & name=="Gummer, Ben", `Valid votes`:=23393]
candidates2017[`ONS Code`=='E14000761' & name=="Martin, Sandy", `Valid votes`:=24224]
candidates2017[`ONS Code`=='E14000781' & name=="Coyle, Laura", c('ONS Code', 'PANO'):=.("E14000742", 305)]
candidates2017[`ONS Code`=='E14000822' & name=="O'Neill, Hannah", `Valid votes`:=28987]
candidates2017[`ONS Code`=='E14000880' & name=="Reed, Graham Richard", c('ONS Code', 'PANO'):=.("E14000879", 452)]
candidates2017[`ONS Code`=='E14000935' & name=="Graves, Alan Wayne", c('ONS Code', 'PANO'):=.("E14000663", 182)]
candidates2017[`ONS Code`=='E14000971' & name=="Robson, Emma", c('ONS Code', 'PANO'):=.("E14000970", 535)]
candidates2017[`ONS Code`=='E14000971' & name=="Strike, Ted", c('ONS Code', 'PANO'):=.("E14000970", 535)]
candidates2017[`ONS Code`=='E14001029' & name=="Clucas, Flo", c('ONS Code', 'PANO'):=.("E14001030", 613)]
candidates2017[`ONS Code`=='S14000002' & name=="Durkin, Richard John", c('ONS Code', 'PANO'):=.("S14000001", 3)]
candidates2017[`ONS Code`=='S14000024' & name=="Briggs, Miles Edward Franc", c('ONS Code', 'PANO'):=.("S14000025", 231)]
candidates2017[`ONS Code`=='S14000025' & name=="Smith, Steph", c('ONS Code', 'PANO'):=.("S14000024", 230)]
candidates2017[`ONS Code`=='S14000030' & name=="Clark, Matthew", `Valid votes`:=576]
candidates2017[`ONS Code`=='S14000035' & name=="Nelson, Isabel", c('ONS Code', 'PANO'):=.("S14000029", 262)]
candidates2017[`ONS Code`=='S14000037' & name=="Jardine, Christine", c('ONS Code', 'PANO'):=.("S14000026", 232)]
candidates2017[`ONS Code`=='S14000043' & name=="Batho, Sandy", c('ONS Code', 'PANO'):=.("S14000026", 232)]

candidates2017 <- merge(candidates2017, results2017, by=c('ONS Code', 'Valid votes'), all.x = TRUE)
candidates2017[,total_votes:=sum(`Valid votes`), by=`ONS Code`]
candidates2017[,share:=(`Valid votes`/total_votes)*100]
candidates2017[,share_first:=max(share, na.rm = TRUE), by=.(`ONS Code`)]
candidates2017[,share_second:=sort(share,partial=length(share)-1)[length(share)-1], by=`ONS Code`]
candidates2017[share==share_first,victory_margin:=share-share_second]
candidates2017[share!=share_first,victory_margin:=share-share_first]
candidates2017[, election:=2017]

results2017 <- candidates2017[,.(share,victory_margin,`Party Identifer`,`ONS Code`,election)]
setnames(results2017, c('ONS Code', 'Party Identifer'), c('id', 'party'))

### 2019
candidates2019 <- fread('./data/candidates/2019/HoC-GE2019-results-by-candidate-csv.csv')
candidates2019[,share:=share*100]
candidates2019[,share_first:=max(share, na.rm = TRUE), by=.(ons_id)]
candidates2019[,share_second:=sort(share,partial=length(share)-1)[length(share)-1], by=ons_id]
candidates2019[share==share_first,victory_margin:=share-share_second]
candidates2019[share!=share_first,victory_margin:=share-share_first]
candidates2019[, election:=2019]

results2019 <- candidates2019[,.(share,victory_margin,party_name,ons_id,election)]
setnames(results2019, c('ons_id', 'party_name'), c('id', 'party'))

results <- rbind(results2015, results2017, results2019)

results[party=="Green", party:="Green Party"]
results[party=="Liberal Democrats", party:="Liberal Democrat"]
results[party=="Scottish National Party", party:="SNP"]
results[party=="Labour and Co-operative", party:="Labour"]
results[!(party %in% candidates[,unique(party_name)]), party:='other']

setnames(results, c('share','victory_margin'), c('share_t1', 'victory_margin_t1'))

## Merge with candidates data
setnames(candidates, 'share', 'share_t0')
candidates <- merge(candidates, results,
                  by.x = c('next_election_year', 'ons_id', 'party_name'),
                  by.y = c("election", 'id', 'party'),
                  all.x = TRUE)

# compute variable for party win/lose election in t+1
candidates[,victory_t1:=ifelse(victory_margin_t1>0,1,0)]

rm(results2015, results2017, results2019, to_fix, elections, candidates2015, candidates2017, candidates2019)

### Compute vote share at election t+1 of strongest non-coethnic opposing party at election t
## compute vote share at t+1
# 2015
candidates2015 <- fread(file = './data/candidates/2015/Data-Table 1.csv')
candidates2015[, BNPVote15:=NULL]
setcolorder(candidates2015, c('ConVote15', 'LabVote15', 'LDVote15', 'UKIPVote15', 'SNPVote15', 'PCVote15', 'GreenVote15'))
setnames(candidates2015,c('ONSConstID', 'ConPPC', 'ConPPCsex', 'ConPPCrace',
                          'LabPCC', 'LabPPCsex', 'LabPPCrace',
                          'LDPCC', 'LDPPCsex', 'LDPPCrace',
                          'UKIPPPC', 'UKIPPPCsex', 'UKIPPPPCrace',
                          'SNPPPC', 'SNPPPCsex', 'SNPPPCrace',
                          'PCPPC', 'PCPPCsex', 'PCPPCrace',
                          'GreenPPC', 'GreenPPCsex', 'GreenPPCrace', 'TotalVote15'),
         c('Constituency ID', 'Conname', 'ConSex', 'ConRace',
           'Labname', 'LabSex', 'LabRace',
           'LDname','LDSex', 'LDRace',
           'UKIPname','UKIPSex', 'UKIPRace',
           'SNPname','SNPSex', 'SNPRace',
           'PCname','PCSex', 'PCRace',
           'Greenname','GreenSex', 'GreenRace', 'TotalVotes15'))

candidates2015[, ConPParty:='Conservative']
candidates2015[, LabPParty:='Labour']
candidates2015[, LDPParty:='Liberal Democrats']
candidates2015[, UKIPPParty:='UKIP']
candidates2015[, SNPPParty:='SNP']
candidates2015[, PCPParty:='Plaid Cymru']
candidates2015[, GreenPParty:='Green Party']

candidates2015 <- melt(candidates2015, id.vars = c("Constituency ID", "ConstituencyName", "Region", 'TotalVotes15'), measure = patterns("PParty", "name", "Race", "Sex", "Vote15"), value.name = c("party", "name", "race", 'sex', 'votes'))
candidates2015[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2015[,Forename:=trimws(Forename, "l")]
candidates2015[,Forename:=gsub("\\s*\\([^\\)]+\\)","",as.character(candidates2015$Forename))]
candidates2015[,Forename:=sub(" .*", "", as.character(candidates2015$Forename))]
candidates2015[,share:=(votes/TotalVotes15)*100]

candidates2015 <- candidates2015[,.(`Constituency ID`, party, share)]
setnames(candidates2015, 'Constituency ID', 'id')
candidates2015[,election:=2015]
# 2017
candidates2017 <- fread('./data/candidates/2017/2017 UKPGE electoral data 4.csv', skip = 1, encoding = 'Latin-1')
candidates2017[,total_votes:=sum(`Valid votes`), by=`ONS Code`]
candidates2017[,share:=(`Valid votes`/total_votes)*100]
candidates2017 <- candidates2017[,.(`ONS Code`, `Party Identifer`, share)]
setnames(candidates2017, c('ONS Code', 'Party Identifer'), c('id', 'party'))
candidates2017[, election:=2017]
# 2019
candidates2019 <- fread('./data/candidates/2019/HoC-GE2019-results-by-candidate-csv.csv')
candidates2019[,share:=share*100]
candidates2019 <- candidates2019[, .(ons_id, party_name, share)]
setnames(candidates2019, c('ons_id', 'party_name'), c('id', 'party'))
candidates2019[, election:=2019]

vote_share_t_1 <- rbind(candidates2015[, .(id, party, share, election)], candidates2017[,.(id, party, share, election)], candidates2019[,.(id, party, share, election)])
vote_share_t_1[party=="Scottish National Party", party:="SNP"]
vote_share_t_1[party=="Green", party:="Green Party"]
vote_share_t_1[party=="Liberal Democrats", party:="Liberal Democrat"]
vote_share_t_1[party=="UK Independence Party", party:="UKIP"]
vote_share_t_1[party=="Christian Peoples Alliance Party", party:="CPA"]
vote_share_t_1[party=="Women's Equality Party", party:="WEP"]
vote_share_t_1[party=="Alliance for Green Socialism", party:="AGS"]
vote_share_t_1[party=="Social Democratic Party", party:="SDP"]

rm(candidates2015, candidates2017, candidates2019)

## compute strongest non-coethnic opponent at t
# 2010 General Election
results <- read_xlsx('./data/candidates/2010/British_Parliamentary_Constituency_General_Election_2010_Version_5.xlsx', sheet=1)
setDT(results)
results[, ConPParty:='Conservative']
results[, LabPParty:='Labour']
results[, LDPParty:='Liberal Democrats']
results[, SNPPParty:='SNP']
results[, PCPParty:='Plaid Cymru']
results[, GreenPParty:='Green Party']
results[, BNPPParty:='BNP']
results[, UKIPPParty:='UKIP']
results[,total_votes:=round((Turn10/100)*Elec10)]

candidates2010 <- melt(results, id.vars = c("RefNo", "Seat", "PAname", "Region", "total_votes"), measure = patterns('PParty', 'PPC', "vt10", "race10", "sex10"), value.name = c('party', 'name', "votes", "race", 'sex'))
candidates2010[,share:=100*(votes/total_votes), by=.(RefNo,Seat)]
candidates2010[!is.na(votes),share_first:=max(share, na.rm = TRUE), by=.(RefNo,Seat,race)]
candidates2010[!is.na(votes) & share==share_first & race=='White',share_strongest_white:=share_first]
candidates2010[!is.na(votes) & share==share_first & race=='BME',share_strongest_bme:=share_first]
candidates2010[!is.na(votes),share_strongest_white:=max(share_strongest_white, na.rm = TRUE), by=.(RefNo,Seat)]
candidates2010[!is.na(votes),share_strongest_bme:=max(share_strongest_bme, na.rm = TRUE), by=.(RefNo,Seat)]
candidates2010[!is.na(votes), minority_victory_margin:=share_strongest_bme-share_strongest_white]
candidates2010[!is.na(votes) & minority_victory_margin<0 & share_strongest_bme==share, party_opposition:=party]
candidates2010[!is.na(votes) & minority_victory_margin>0 & share_strongest_white==share, party_opposition:=party]

candidates2010 <- candidates2010[(!is.na(party_opposition) & is.finite(minority_victory_margin)),
                                 .(RefNo, party_opposition)]

# add mapping between Constituency PA ID and ONS ID
ids <- fread(file = './data/candidates/2010/pa_ons_id.csv')

candidates2010 <- merge(candidates2010, ids,
                        by.x = 'RefNo', by.y = 'Press Association ID Number', all.x = TRUE)
# rename columns and add election year
setnames(candidates2010, old=c('RefNo','Constituency ID','Constituency Name'),
         new = c('pa_id', 'ons_id','constituency_name'))
candidates2010[,election:=2010]

# 2015 GE
candidates2015 <- fread(file = './data/candidates/2015/Data-Table 1.csv')
candidates2015[, BNPVote15:=NULL]
setcolorder(candidates2015, c('ConVote15', 'LabVote15', 'LDVote15', 'UKIPVote15', 'SNPVote15', 'PCVote15', 'GreenVote15'))
setnames(candidates2015,c('ONSConstID', 'ConPPC', 'ConPPCsex', 'ConPPCrace',
                          'LabPCC', 'LabPPCsex', 'LabPPCrace',
                          'LDPCC', 'LDPPCsex', 'LDPPCrace',
                          'UKIPPPC', 'UKIPPPCsex', 'UKIPPPPCrace',
                          'SNPPPC', 'SNPPPCsex', 'SNPPPCrace',
                          'PCPPC', 'PCPPCsex', 'PCPPCrace',
                          'GreenPPC', 'GreenPPCsex', 'GreenPPCrace', 'TotalVote15'),
         c('Constituency ID', 'Conname', 'ConSex', 'ConRace',
           'Labname', 'LabSex', 'LabRace',
           'LDname','LDSex', 'LDRace',
           'UKIPname','UKIPSex', 'UKIPRace',
           'SNPname','SNPSex', 'SNPRace',
           'PCname','PCSex', 'PCRace',
           'Greenname','GreenSex', 'GreenRace', 'TotalVotes15'))

candidates2015[, ConPParty:='Conservative']
candidates2015[, LabPParty:='Labour']
candidates2015[, LDPParty:='Liberal Democrats']
candidates2015[, UKIPPParty:='UKIP']
candidates2015[, SNPPParty:='SNP']
candidates2015[, PCPParty:='Plaid Cymru']
candidates2015[, GreenPParty:='Green Party']

candidates2015 <- melt(candidates2015, id.vars = c("Constituency ID", "ConstituencyName", "Region", 'TotalVotes15'), measure = patterns("PParty", "name", "Race", "Sex", "Vote15"), value.name = c("party", "name", "race", 'sex', 'votes'))
candidates2015[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2015[,Forename:=trimws(Forename, "l")]
candidates2015[,Forename:=gsub("\\s*\\([^\\)]+\\)","",as.character(candidates2015$Forename))]
candidates2015[,Forename:=sub(" .*", "", as.character(candidates2015$Forename))]
candidates2015[,share:=(votes/TotalVotes15)*100]
candidates2015[,share_first:=max(share, na.rm = TRUE), by=.(`Constituency ID`, race)]
candidates2015[share==share_first & race=='White',share_strongest_white:=share_first]
candidates2015[share==share_first & race=='BME',share_strongest_bme:=share_first]
candidates2015[,share_strongest_white:=max(share_strongest_white, na.rm = TRUE), by=.(`Constituency ID`)]
candidates2015[,share_strongest_bme:=max(share_strongest_bme, na.rm = TRUE), by=.(`Constituency ID`)]
candidates2015[,minority_victory_margin:=share_strongest_bme-share_strongest_white]

candidates2015[!is.na(votes) & minority_victory_margin<0 & share_strongest_bme==share, party_opposition:=party]
candidates2015[!is.na(votes) & minority_victory_margin>0 & share_strongest_white==share, party_opposition:=party]

candidates2015 <- candidates2015[(!is.na(party_opposition) & is.finite(minority_victory_margin)),
                                 .(`Constituency ID`, party_opposition)]

setnames(candidates2015, old=c('Constituency ID'), new = c('ons_id'))
candidates2015[,election:=2015]

### 2017 GE
candidates2017 <- fread(file = './data/candidates/2017/Parliamentary Candidates UK Project Subset.csv')
# separate candidates' forename, surname
candidates2017[,c('Surname', 'Forename'):= tstrsplit(name, ",", type.convert = TRUE, fixed = TRUE)]
candidates2017[,Forename:=trimws(Forename, "l")]
candidates2017[,Forename:=gsub("\\s*\\([^\\)]+\\)","",as.character(candidates2017$Forename))]
candidates2017[,Forename:=sub(" .*", "", as.character(candidates2017$Forename))]

results2017 <- fread('./data/candidates/2017/2017 UKPGE electoral data 4.csv', skip = 1, encoding = 'Latin-1')
results2017[,Surname:=tolower(Surname)]
to_fix <- candidates2017[is.na(Numberofvotesobtainedbycandidate)]
to_fix[,Surname:=tolower(Surname)]

to_fix <- merge(to_fix,results2017,by.x = c('const_name','Surname'), by.y = c('Constituency','Surname'), all.x = TRUE)
# manually fix non-matching cases
to_fix[name=='Little-Pengelly, Emma', c('ONS Code', 'Valid votes'):=.('N06000003',13299)]
to_fix[name=='McDonnell, Alasdair', c('ONS Code', 'Valid votes'):=.('N06000003',11303)]
to_fix[name=="O'Muilleoir, Mairtin", c('ONS Code', 'Valid votes'):=.('N06000003',7143)]
to_fix[name=="Atwood, Tim", c('ONS Code', 'Valid votes'):=.('N06000004',2860)]
to_fix[name=="Stewart, John", c('ONS Code', 'Valid votes'):=.('N06000005',4524)]
to_fix[name=="Campbell, Noreen", c('ONS Code', 'Valid votes'):=.('N06000007',886)]
to_fix[name=="Elliott, Tom", c('ONS Code', 'Valid votes'):=.('N06000007',24355)]
to_fix[name=="Garrity, Mary", c('ONS Code', 'Valid votes'):=.('N06000007',2587)]
to_fix[name=="Gildernew, Michelle", c('ONS Code', 'Valid votes'):=.('N06000007',25230)]
to_fix[name=="Jones, Tanya", c('ONS Code', 'Valid votes'):=.('N06000007',423)]
to_fix[name=="McCallion, Elisha", c('ONS Code', 'Valid votes'):=.('N06000008',18256)]
to_fix[name=="Nickels, Ian", c('ONS Code', 'Valid votes'):=.('N06000009',462)]
to_fix[name=="Brady, Mickey", c('ONS Code', 'Valid votes'):=.('N06000011',25666)]
to_fix[name=="Coade, Jackie", c('ONS Code', 'Valid votes'):=.('N06000011',1256)]
to_fix[name=="Irwin, William", c('ONS Code', 'Valid votes'):=.('N06000011',13177)]
to_fix[name=="McNulty, Justin", c('ONS Code', 'Valid votes'):=.('N06000011',9055)]
to_fix[name=="Nicholson, Sam", c('ONS Code', 'Valid votes'):=.('N06000011',4425)]
to_fix[name=="McCartney, Therese", c('ONS Code', 'Valid votes'):=.('N06000013',531)]

candidates2017 <- candidates2017[!is.na(Numberofvotesobtainedbycandidate)]
setnames(candidates2017, c('Numberofvotesobtainedbycandidate', 'ONSConstID', 'pano'), c('Valid votes', 'ONS Code', 'PANO'))

rmVars <- setdiff(colnames(to_fix), colnames(candidates2017))
to_fix[, (rmVars):=NULL]
candidates2017 <- rbind(candidates2017, to_fix)
# manually fix 16 cases with inaccurate ONS Code/PANO and 4 cases with inaccurate 'valid votes'
to_fix <- merge(candidates2017, results2017, by=c('ONS Code', 'Valid votes'), all.x = TRUE)
to_fix <- to_fix[is.na(`Party Identifer`)]

candidates2017[`ONS Code`=='E14000602' & name=="Langley, John", c('ONS Code', 'PANO'):=.('E14000601', 101)]
candidates2017[`ONS Code`=='E14000641' & name=="Martin, Craig", c('ONS Code', 'PANO'):=.('E14000840',  215)]
candidates2017[`ONS Code`=='E14000656' & name=="Underwood, Peter", c('ONS Code', 'PANO'):=.("E14000655",  171)]
candidates2017[`ONS Code`=='E14000759' & name=="Warrington, Rosemary", c('ONS Code', 'PANO'):=.("E14000760",  329)]
candidates2017[`ONS Code`=='E14000761' & name=="Gummer, Ben", `Valid votes`:=23393]
candidates2017[`ONS Code`=='E14000761' & name=="Martin, Sandy", `Valid votes`:=24224]
candidates2017[`ONS Code`=='E14000781' & name=="Coyle, Laura", c('ONS Code', 'PANO'):=.("E14000742", 305)]
candidates2017[`ONS Code`=='E14000822' & name=="O'Neill, Hannah", `Valid votes`:=28987]
candidates2017[`ONS Code`=='E14000880' & name=="Reed, Graham Richard", c('ONS Code', 'PANO'):=.("E14000879", 452)]
candidates2017[`ONS Code`=='E14000935' & name=="Graves, Alan Wayne", c('ONS Code', 'PANO'):=.("E14000663", 182)]
candidates2017[`ONS Code`=='E14000971' & name=="Robson, Emma", c('ONS Code', 'PANO'):=.("E14000970", 535)]
candidates2017[`ONS Code`=='E14000971' & name=="Strike, Ted", c('ONS Code', 'PANO'):=.("E14000970", 535)]
candidates2017[`ONS Code`=='E14001029' & name=="Clucas, Flo", c('ONS Code', 'PANO'):=.("E14001030", 613)]
candidates2017[`ONS Code`=='S14000002' & name=="Durkin, Richard John", c('ONS Code', 'PANO'):=.("S14000001", 3)]
candidates2017[`ONS Code`=='S14000024' & name=="Briggs, Miles Edward Franc", c('ONS Code', 'PANO'):=.("S14000025", 231)]
candidates2017[`ONS Code`=='S14000025' & name=="Smith, Steph", c('ONS Code', 'PANO'):=.("S14000024", 230)]
candidates2017[`ONS Code`=='S14000030' & name=="Clark, Matthew", `Valid votes`:=576]
candidates2017[`ONS Code`=='S14000035' & name=="Nelson, Isabel", c('ONS Code', 'PANO'):=.("S14000029", 262)]
candidates2017[`ONS Code`=='S14000037' & name=="Jardine, Christine", c('ONS Code', 'PANO'):=.("S14000026", 232)]
candidates2017[`ONS Code`=='S14000043' & name=="Batho, Sandy", c('ONS Code', 'PANO'):=.("S14000026", 232)]

candidates2017 <- merge(candidates2017, results2017, by=c('ONS Code', 'Valid votes'), all.x = TRUE)
candidates2017[,total_votes:=sum(`Valid votes`), by=`ONS Code`]
candidates2017[,share:=(`Valid votes`/total_votes)*100]
candidates2017[,share_first:=max(share, na.rm = TRUE), by=.(`ONS Code`, bme)]
candidates2017[share==share_first & bme==0,share_strongest_white:=share_first]
candidates2017[share==share_first & bme==1,share_strongest_bme:=share_first]
candidates2017[,share_strongest_white:=max(share_strongest_white, na.rm = TRUE), by=.(`ONS Code`)]
candidates2017[,share_strongest_bme:=max(share_strongest_bme, na.rm = TRUE), by=.(`ONS Code`)]
candidates2017[,minority_victory_margin:=share_strongest_bme-share_strongest_white]

candidates2017[minority_victory_margin<0 & share_strongest_bme==share, party_opposition:=`Party Identifer`]
candidates2017[minority_victory_margin>0 & share_strongest_white==share, party_opposition:=`Party Identifer`]
candidates2017 <- candidates2017[(!is.na(party_opposition) & is.finite(minority_victory_margin)),
                                 .(`ONS Code`, party_opposition)]
setnames(candidates2017, old=c('ONS Code'), new = c('ons_id'))
candidates2017[,election:=2017]

party_opposition_t <- rbind(candidates2010[,.(ons_id, party_opposition, election)], candidates2015, candidates2017)
party_opposition_t[party_opposition=="Scottish National Party", party_opposition:="SNP"]
party_opposition_t[party_opposition=="Green", party_opposition:="Green Party"]
party_opposition_t[party_opposition=="Liberal Democrats", party_opposition:="Liberal Democrat"]

## Merge vote share t+1 with opposition party t
party_opposition_t[election==2010, next_election_year:=2015]
party_opposition_t[election==2015, next_election_year:=2017]
party_opposition_t[election==2017, next_election_year:=2019]

party_opposition <- merge(party_opposition_t, vote_share_t_1,
                          by.x = c('ons_id', 'next_election_year', 'party_opposition'),
                          by.y = c('id', 'election', 'party'),
                          all.x = TRUE)
# remove duplicates opposing party for id=='E14000873' & next_election_year==2019
party_opposition[ons_id=='E14000873' & next_election_year==2019 & party_opposition=='Independent' & (share<0.28 | share>0.29), rm:=1]
party_opposition <- party_opposition[is.na(rm)]
setnames(party_opposition, 'share', 'share_non_coethnic_opposition_t1')


## Merge with candidates data
candidates <- merge(candidates,
                    party_opposition[,.(ons_id, election, party_opposition, share_non_coethnic_opposition_t1)],
                    by=c('ons_id', 'election'),
                    all.x=TRUE)

rm(list=setdiff(ls(), 'candidates'))

#### Compute effective number of parties in election t+1
elections <- read_dta('./data/candidates/2019/UK GE 2010_2019 V1.9 (inc Brexit EU Ref vote and 2011 Census).dta')
setDT(elections)
## compute effective # of parties 2019
vs2019 <- elections[,.(ons_id,Validvotes19,Convote19, Labvote19, LDvote19, SNPvote19, PCvote19, Greenvote19, BrxVote19,
                       DUPvote19, SFvote19, SDLPvote19, UUPvote19, Alliancevote19, othervote19)]
cols <- c('Convote19', 'Labvote19', 'LDvote19', 'SNPvote19', 'PCvote19', 'Greenvote19', 'BrxVote19','DUPvote19', 'SFvote19',
          'SDLPvote19', 'UUPvote19', 'Alliancevote19', 'othervote19')
vs2019[ , (cols) := lapply(.SD, "/", Validvotes19), .SDcols = cols]
vs2019[ ,  eff_num_parties := 1/(rowSums((.SD)^2, na.rm = TRUE)), .SDcols = cols]
vs2019[, next_election_year:=2019]

## 2017
vs2017 <- elections[, .(ons_id,valid_votes17,convote17, labvote17, ldvote17, ukipvote17, greenvote17, snpvote17, pcvote17,
                        dupvote17, sfvote17, sdlpvote17, uupvote17, alliancevote17, othervote17)]
cols <- c('convote17','labvote17','ldvote17','ukipvote17','greenvote17','snpvote17','pcvote17',
          'dupvote17','sfvote17','sdlpvote17','uupvote17','alliancevote17','othervote17')
vs2017[ , (cols) := lapply(.SD, "/", valid_votes17), .SDcols = cols]
vs2017[ ,  eff_num_parties := 1/(rowSums((.SD)^2, na.rm = TRUE)), .SDcols = cols]
vs2017[, next_election_year:=2017]

## 2015
vs2015 <- elections[, .(ons_id,TotalVote15,ConVote15, LabVote15, LDVote15, SNPVote15, PCVote15, UKIPVote15, GreenVote15, BNPVote15)]
cols <- c('ConVote15', 'LabVote15', 'LDVote15', 'SNPVote15', 'PCVote15', 'UKIPVote15', 'GreenVote15', 'BNPVote15')
vs2015[ ,  OtherVote15 := TotalVote15-(rowSums(.SD, na.rm = TRUE)), .SDcols = cols]
cols <- c(cols, 'OtherVote15')
vs2015[ , (cols) := lapply(.SD, "/", TotalVote15), .SDcols = cols]
vs2015[ ,  eff_num_parties := 1/(rowSums((.SD)^2, na.rm = TRUE)), .SDcols = cols]
vs2015[, next_election_year:=2015]

num_parties <- rbind(vs2015[,.(ons_id,eff_num_parties,next_election_year)],
                    vs2017[,.(ons_id,eff_num_parties,next_election_year)],
                    vs2019[,.(ons_id,eff_num_parties,next_election_year)])

## Merge with candidates data
candidates <- merge(candidates,
                    num_parties,
                    by=c('ons_id', 'next_election_year'),
                    all.x=TRUE)

rm(list=setdiff(ls(), 'candidates'))


#### Merge candidates data with constituency demographics from 2011 England-Wales census and Scotland census
## get census data
census <- fread('./data/clean/census_demographic_data.csv')
candidates <- merge(candidates, census,
                    by.x = 'ons_id',
                    by.y = 'geogcode',
                    all.x = TRUE)

#### Merge candidates data with indicator variable of whether BES sampled only white respondents in constituency
## get indicator variable
bes_indicator <- fread('./data/clean/bes_sampling_indicator.csv')
candidates <- merge(candidates, bes_indicator,
                    by = c('ons_id', 'election'),
                    all.x = TRUE)

#### Save candidates data for main analysis
fwrite(candidates, './data/clean/data_main_analysis.csv')
rm(list=setdiff(ls(), 'candidates'))
######################################


######################################
# Clean The British Election Study (BES) data used in
# Appendix Tables C.1 and D.2
# **NOTE**: We do not include this 'raw' BES data (next 3 lines of code) in the materials
# We include the below code to clean the 'raw' BES data as well as the 'final' data
# produced by this code in file './data/clean/data_bes_analysis.csv'
#############################################
# bes19 <- data.table(read_dta('./data/bes/bes_rps_2019_1.0.0.dta'))
# bes17 <- data.table(read_dta('./data/bes/bes_f2f_2017_v1.5.dta'))
# bes15 <- data.table(read_dta('./data/bes/bes_f2f_2015_v4.0.dta'))
# 
# ## clean & compute variables of turnout & demographics
# # turnout in GE (self-reported)
# bes19[!(b01 %in% seq(1,2)), b01:=NA]
# bes17[!(b01 %in% seq(1,2)), b01:=NA]
# bes15[!(b01 %in% seq(1,2)), b01:=NA]
# 
# bes19[b01==2, b01:=0]
# bes17[b01==2, b01:=0]
# bes15[b01==2, b01:=0]
# 
# bes19[,turnout_ge:=b01]
# bes17[,turnout_ge:=b01]
# bes15[,turnout_ge:=b01]
# 
# # validated turnout
# # missing for 2019
# bes17[,validated_turnout:=validatedTurnoutBinary]
# bes15[,validated_turnout:=validatedTurnoutBinary]
# 
# # turnout in previous GE (self-reported)
# bes19[u05>-1, past_turnout_ge:=ifelse((u05==1 | u05==11),0,1)]
# bes17[u05>-1, past_turnout_ge:=ifelse((u05==1 | u05==11),0,1)]
# bes15[u05>-1, past_turnout_ge:=ifelse((u05==1 | u05==11),0,1)]
# 
# # vote choice if voted
# bes19[b02>0 & b02<9, vote_choice:=b02]
# bes17[b02>0 & b02<9, vote_choice:=b02]
# bes15[b02>0 & b02<9, vote_choice:=b02]
# 
# # vote choice if would have voted
# bes19[b04>0 & b04<9, vote_preference:=b04]
# bes17[b04>0 & b04<9, vote_preference:=b04]
# bes15[, vote_preference:=b04-1]
# bes15[vote_preference<1 & vote_preference>8, vote_preference:=NA]
# 
# # white
# bes19[y11>0,white:=ifelse(y11<5,1,0)]
# bes17[y11>0,white:=ifelse(y11<5,1,0)]
# bes15[y11>0,white:=ifelse(y11<5,1,0)]
# # male
# bes19[y09>0 & y09<3,male:=ifelse(y09==1,1,0)]
# bes17[y09>0 & y09<3,male:=ifelse(y09==1,1,0)]
# bes15[y09>0 & y09<3,male:=ifelse(y09==1,1,0)]
# # low income
# bes19[Y01_detailed>0,low_income:=ifelse(Y01_detailed<7 & Y01_detailed>0,1,0)]
# bes17[y01>0,low_income:=ifelse(y01<7 & y01>0,1,0)]
# bes15[y01>0,low_income:=ifelse(y01<7 & y01>0,1,0)]
# # own house
# bes19[y03>0,own_house:=ifelse(y03<3 & y03>0,1,0)]
# bes17[y03>0,own_house:=ifelse(y03<3 & y03>0,1,0)]
# bes15[y03>0,own_house:=ifelse(y03<3 & y03>0,1,0)]
# # unemployed
# bes19[y17>0,unemployed:=ifelse(y17==5,1,0)]
# bes17[y17>0,unemployed:=ifelse(y17==5,1,0)]
# bes15[y17>0,unemployed:=ifelse(y17==5,1,0)]
# # employed
# bes19[y17>0,employed:=ifelse(y17>=0 & y17<5,1,0)]
# bes17[y17>0,employed:=ifelse(y17>=0 & y17<5,1,0)]
# bes15[y17>0,employed:=ifelse(y17>=0 & y17<5,1,0)]
# # single
# bes19[y26>0,single:=ifelse(y26==3,1,0)]
# bes17[y26>0,single:=ifelse(y26==3,1,0)]
# bes15[y26>0,single:=ifelse(y26==3,1,0)]
# # age
# bes19[Age<0, Age:=NA]
# bes19[,Age:=as.numeric(Age)]
# bes17[Age<0, Age:=NA]
# bes17[,Age:=as.numeric(Age)]
# bes15[Age<0, Age:=NA]
# # attitudes towards migrants too many immigrants let into the country (0,1)
# bes19[!(j05 %in% seq(1,2)), j05:=NA]
# bes17[!(j05 %in% seq(1,2)), j05:=NA]
# bes15[!(j05 %in% seq(1,2)), j05:=NA]
# bes19[, j05:=j05-1]
# bes17[, j05:=j05-1]
# bes15[, j05:=j05-1]
# 
# bes19[,election:=2019]
# bes17[,election:=2017]
# bes15[,election:=2015]
# 
# bes19 <- bes19[,.(election, Constit_Code, Constit_Name, Age, single, unemployed,
#                   employed, own_house, low_income, male, white,
#                   j05, turnout_ge, past_turnout_ge, vote_choice, vote_preference)]
# bes17 <- bes17[,.(election, Constit_Code, Constit_Name, Age, single, unemployed,
#                   employed, own_house, low_income, male, white, j05, turnout_ge,
#                   past_turnout_ge, validated_turnout, vote_choice, as.numeric(vote_preference))]
# bes15 <- bes15[,.(election, Constit_Code, Constit_Name, Age, single, unemployed,
#                   employed, own_house, low_income, male, white,
#                   j05, turnout_ge, past_turnout_ge, validated_turnout,
#                   vote_choice, as.numeric(vote_preference))]
# 
# bes <- rbind(bes15,bes17,bes19, fill=TRUE)
# 
# # merge BES data with candidates data
# bes <- merge(candidates, bes,
#              by.x = c('next_election_year', 'ons_id'),
#              by.y = c("election", 'Constit_Code'),
#              all.x = TRUE)
# 
# # define cluster as constituency-election year tupples
# bes[,cluster:=paste(election, ons_id, sep = '_')]
# 
# # define whether respondent voted for party of incumbent and party of strongest opponent
# bes[vote_choice==1, vote_choice_ch:='Labour']
# bes[vote_choice==2, vote_choice_ch:='Conservative']
# bes[vote_choice==3, vote_choice_ch:='Liberal Democrat']
# bes[vote_choice==4, vote_choice_ch:='SNP']
# bes[vote_choice==5, vote_choice_ch:='Plaid Cymru']
# bes[vote_choice==6, vote_choice_ch:='Green Party']
# bes[vote_choice==7, vote_choice_ch:='UKIP']
# bes[vote_choice==8, vote_choice_ch:='BNP']
# bes[vote_choice==9, vote_choice_ch:='Other']
# 
# bes[vote_preference==1, vote_preference_ch:='Labour']
# bes[vote_preference==2, vote_preference_ch:='Conservative']
# bes[vote_preference==3, vote_preference_ch:='Liberal Democrat']
# bes[vote_preference==4, vote_preference_ch:='SNP']
# bes[vote_preference==5, vote_preference_ch:='Plaid Cymru']
# bes[vote_preference==6, vote_preference_ch:='Green Party']
# bes[vote_preference==7, vote_preference_ch:='UKIP']
# bes[vote_preference==8, vote_preference_ch:='BNP']
# bes[vote_preference==9, vote_preference_ch:='Other']
# 
# bes[vote_choice_ch==party_name, vote_incumbent:=1]
# bes[vote_choice_ch!=party_name, vote_incumbent:=0]
# bes[is.na(vote_choice_ch), vote_incumbent:=NA]
# 
# bes[vote_choice_ch==party_opposition, vote_opposition:=1]
# bes[vote_choice_ch!=party_opposition, vote_opposition:=0]
# bes[is.na(vote_choice_ch), vote_opposition:=NA]
# # note that no respondent chooses option 'Other'
# bes[(party_opposition %in% c("Independent", "CPA", "RDP", "WEP", "AGS", "CPP", "SDP")), vote_opposition:=NA]
# 
# ##### Save BES data for analysis
# fwrite(bes, './data/clean/data_bes_analysis.csv')
# 
# rm(list=setdiff(ls(), c('candidates', 'bes')))
# ############################################################
# 
# ### Compute variable of whether constituency is majority-white/majority-minority based on sampling of only white respondents within constituency by British Election Study
# candidates[, cluster_id:=paste(election, ons_id, sep='_')]
# candidates[, cluster_id_bes_white:=ifelse(cluster_id %in% bes[white==1, unique(cluster)],1,0)]
# ## Save indicator variable
# fwrite(candidates[, .(ons_id, election, cluster_id_bes_white)], file='./data/clean/bes_sampling_indicator.csv')
############################################################



############################################################
# Clean 2005 candidates and election results data used in
# Appendix Table B.6
############################################################
## clean candidates data and compute running variable
results <- read_xls('./data/candidates/2005/The British Parliamentary Constituency Database 1992-2005 Release 1.3.xls', sheet=2)
results <- setDT(results)

results[, ConPParty:='Conservative']
results[, LabPParty:='Labour']
results[, LDPParty:='Liberal Democrats']
results[, snpPParty:='SNP']
results[, pcPParty:='Plaid Cymru']
results[, grnPParty:='Green Party']
results[, bnpPParty:='BNP']
results[, ukipPParty:='UKIP']
results[, othPParty:='Other']

setnames(results, c('convt05', 'labvt05', 'ldvt05','LDSex05', 'majvt05', 'totvt05'),
         c('Convt05', 'Labvt05', 'LDvt05', 'LDsex05', 'majvote05', 'totvote05'))

candidates2005 <- melt(results, id.vars = c("PA05", "Seat05", "County", "Region", 'totvote05'), measure = patterns('PParty', "ppc05", "vt05", "race05", "sex05", "Inc05"), value.name = c('party', 'name', "votes", "race", 'sex', 'incumbent'))
candidates2005[,share:=100*(votes/totvote05), by=.(PA05,Seat05)]

candidates2005[!is.na(votes),share_first:=max(share, na.rm = TRUE), by=.(PA05,Seat05,race)]
candidates2005[!is.na(votes) & share==share_first & race==0,share_strongest_white:=share_first]
candidates2005[!is.na(votes) & share==share_first & race==1,share_strongest_bme:=share_first]
# ignore warning message from max function: values are defined for row of strongest white and BME candidates and otherwise are NA  
candidates2005[!is.na(votes),share_strongest_white:=max(share_strongest_white, na.rm = TRUE), by=.(PA05,Seat05)]
candidates2005[!is.na(votes),share_strongest_bme:=max(share_strongest_bme, na.rm = TRUE), by=.(PA05,Seat05)]
# note that minority_victory_margin is undefined for constituencies with all BME candidates, or all white candidates
candidates2005[!is.na(votes), minority_victory_margin:=share_strongest_bme-share_strongest_white]
candidates2005[,female:=ifelse(sex==1,1,0)]
# if victory margin > 0, keep bme candidate info. otherwise keep white candidate info
candidates2005[(minority_victory_margin<0 & share==share_strongest_white),
               c('party_name', 'candidate_name', 'candidate_female', 'candidate_incumbent'):=.(party,name,female,incumbent)]
candidates2005[(minority_victory_margin>0 & share==share_strongest_bme),
               c('party_name', 'candidate_name', 'candidate_female', 'candidate_incumbent'):=.(party,name,female,incumbent)]

# subset to strongest white/bme candidates in constituencies with defined minority victory margin & relevant columns
candidates2005 <- candidates2005[(!is.na(party_name) & is.finite(minority_victory_margin)),
                             .(PA05, Seat05, County, Region, party_name,candidate_name,
                               candidate_female, candidate_incumbent,
                               share, minority_victory_margin)]

rm(results)

## Note that `turnout_2010_dasymetric_interpolation.csv` is computed by:
# source('./code/interpolation 2010 2005/vote_interpolation_from_2010_to_2005.R')
# which loads interpolation function from source('./code/interpolation_programme.R') Goplerud, Max (2016) Political Analysis. 24(1): 121-129.
turnout2010 <- fread(file = './data/interpolation 2010 2005/turnout_2010_dasymetric_interpolation.csv')

# manually fix seat names
candidates2005$Seat05 <- gsub('&', 'and', candidates2005$Seat05)
candidates2005[Seat05=='Worcestershire Mid', Seat05:='Mid Worcestershire']
candidates2005[Seat05=='Worcestershire West', Seat05:='West Worcestershire']
candidates2005[Seat05=='Aberdeenshire West and Kincardine', Seat05:='West Aberdeenshire and Kincardine']
candidates2005[Seat05=='Aldridge - Brownhills', Seat05:='Aldridge-Brownhills']
candidates2005[Seat05=='Ayrshire Central', Seat05:='Central Ayrshire']
candidates2005[Seat05=='Ayrshire North and Arran', Seat05:='North Ayrshire and Arran']
candidates2005[Seat05=='Bedfordshire Mid', Seat05:='Mid Bedfordshire']
candidates2005[Seat05=='Bedfordshire North East', Seat05:='North East Bedfordshire']
candidates2005[Seat05=='Bedfordshire South West', Seat05:='South West Bedfordshire']
candidates2005[Seat05=='Bury St Edmunds', Seat05:='Bury St. Edmunds']
candidates2005[Seat05=='Cambridgeshire North East', Seat05:='North East Cambridgeshire']
candidates2005[Seat05=='Cambridgeshire North West', Seat05:='North West Cambridgeshire']
candidates2005[Seat05=='Cambridgeshire South', Seat05:='South Cambridgeshire']
candidates2005[Seat05=='Cambridgeshire South East', Seat05:='South East Cambridgeshire']
candidates2005[Seat05=='Chelmsford West', Seat05:='West Chelmsford']
candidates2005[Seat05=='Chester, City of', Seat05:='City of Chester']
candidates2005[Seat05=='Cornwall North', Seat05:='North Cornwall']
candidates2005[Seat05=='Cornwall South East', Seat05:='South East Cornwall']
candidates2005[Seat05=='Cumbernauld, Kilsyth and Kirkintilloch', Seat05:='Cumbernauld, Kilsyth and Kirkintilloch East']
candidates2005[Seat05=='Derbyshire North East', Seat05:='North East Derbyshire']
candidates2005[Seat05=='Derbyshire South', Seat05:='South Derbyshire']
candidates2005[Seat05=='Derbyshire West', Seat05:='West Derbyshire']
candidates2005[Seat05=='Devon East', Seat05:='East Devon']
candidates2005[Seat05=='Devon North', Seat05:='North Devon']
candidates2005[Seat05=='Devon South West', Seat05:='South West Devon']
candidates2005[Seat05=='Devon West and Torridge', Seat05:='Torridge and West Devon']
candidates2005[Seat05=='Dorset  Mid and North Poole', Seat05:='Mid Dorset and North Poole']
candidates2005[Seat05=='Dorset North', Seat05:='North Dorset']
candidates2005[Seat05=='Dorset South', Seat05:='South Dorset']
candidates2005[Seat05=='Dorset West', Seat05:='West Dorset']
candidates2005[Seat05=='Dunbartonshire East', Seat05:='East Dunbartonshire']
candidates2005[Seat05=='Dunbartonshire West', Seat05:='West Dunbartonshire']
candidates2005[Seat05=='Durham North', Seat05:='North Durham']
candidates2005[Seat05=='Durham North West', Seat05:='North West Durham']
candidates2005[Seat05=='Durham, City of', Seat05:='City of Durham']
candidates2005[Seat05=='Ealing Southall', Seat05:='Ealing, Southall']
candidates2005[Seat05=='East Kilbride, Strathaven and Lesmahago', Seat05:='East Kilbride, Strathaven and Lesmahagow']
candidates2005[Seat05=='Edinburth West', Seat05:='Edinburgh West']
candidates2005[Seat05=='Essex North', Seat05:='North Essex']
candidates2005[Seat05=='Fife North East', Seat05:='North East Fife']
candidates2005[Seat05=='Hampshire East', Seat05:='East Hampshire']
candidates2005[Seat05=='Hampshire North East', Seat05:='North East Hampshire']
candidates2005[Seat05=='Hampshire North West', Seat05:='North West Hampshire']
candidates2005[Seat05=='Hertfordshire North East', Seat05:='North East Hertfordshire']
candidates2005[Seat05=='Hertfordshire South West', Seat05:='South West Hertfordshire']
candidates2005[Seat05=='Holborn and St Pancras', Seat05:='Holborn and St. Pancras']
candidates2005[Seat05=='Hull East', Seat05:='Kingston upon Hull East']
candidates2005[Seat05=='Hull North', Seat05:='Kingston upon Hull North']
candidates2005[Seat05=='Hull West and Hessle', Seat05:='Kingston upon Hull West and Hessle']
candidates2005[Seat05=='Kirkaldy and Cowdenbeath', Seat05:='Kirkcaldy and Cowdenbeath']
candidates2005[Seat05=='Lanarck and Hamilton East', Seat05:='Lanark and Hamilton East']
candidates2005[Seat05=='Lancashire West', Seat05:='West Lancashire']
candidates2005[Seat05=='Leicestershire North West', Seat05:='North West Leicestershire']
candidates2005[Seat05=='Lewisham Deptford', Seat05:='Lewisham, Deptford']
candidates2005[Seat05=='Livingstone', Seat05:='Livingston']
candidates2005[Seat05=='Manchester, Central', Seat05:='Manchester Central']
candidates2005[Seat05=='Milton Keynes North East', Seat05:='North East Milton Keynes']
candidates2005[Seat05=='Na h-Eileanan an lar', Seat05:='Na h-Eileanan an Iar']
candidates2005[Seat05=='Norfolk Mid', Seat05:='Mid Norfolk']
candidates2005[Seat05=='Norfolk North', Seat05:='North Norfolk']
candidates2005[Seat05=='Norfolk North West', Seat05:='North West Norfolk']
candidates2005[Seat05=='Norfolk South', Seat05:='South Norfolk']
candidates2005[Seat05=='Norfolk South West', Seat05:='South West Norfolk']
candidates2005[Seat05=="Regent's Park and Kensington North", Seat05:="Regent's Park and North Kensington"]
candidates2005[Seat05=='Renfrewshire East', Seat05:='East Renfrewshire']
candidates2005[Seat05=='Ribble South', Seat05:='South Ribble']
candidates2005[Seat05=='Ruislip - Northwood', Seat05:='Ruislip-Northwood']
candidates2005[Seat05=='St Albans', Seat05:='St. Albans']
candidates2005[Seat05=='St Helens North', Seat05:='St. Helens North']
candidates2005[Seat05=='St Helens South', Seat05:='St. Helens South']
candidates2005[Seat05=='St Ives', Seat05:='St. Ives']
candidates2005[Seat05=='Sheffield, Central', Seat05:='Sheffield Central']
candidates2005[Seat05=='Shropshire North', Seat05:='North Shropshire']
candidates2005[Seat05=='South Holland and The Deepings', Seat05:='South Holland and the Deepings']
candidates2005[Seat05=='Southwark  North and Bermondsey', Seat05:='North Southwark and Bermondsey']
candidates2005[Seat05=='Staffordshire South', Seat05:='South Staffordshire']
candidates2005[Seat05=='Stratford on Avon', Seat05:='Stratford-on-Avon']
candidates2005[Seat05=='Suffolk Central and Ipswich North', Seat05:='Central Suffolk and North Ipswich']
candidates2005[Seat05=='Suffolk South', Seat05:='South Suffolk']
candidates2005[Seat05=='Suffolk West', Seat05:='West Suffolk']
candidates2005[Seat05=='Surrey East', Seat05:='East Surrey']
candidates2005[Seat05=='Surrey South West', Seat05:='South West Surrey']
candidates2005[Seat05=='Sussex Mid', Seat05:='Mid Sussex']
candidates2005[Seat05=='Swindon North', Seat05:='North Swindon']
candidates2005[Seat05=='Swindon South', Seat05:='South Swindon']
candidates2005[Seat05=='Thanet North', Seat05:='North Thanet']
candidates2005[Seat05=='Thanet South', Seat05:='South Thanet']
candidates2005[Seat05=='Truro and St Austell', Seat05:='Truro and St. Austell']
candidates2005[Seat05=='Tyneside North', Seat05:='North Tyneside']
candidates2005[Seat05=='Warwickshire North', Seat05:='North Warwickshire']
candidates2005[Seat05=='Wiltshire North', Seat05:='North Wiltshire']
candidates2005[Seat05=='Worthing East and Shoreham', Seat05:='East Worthing and Shoreham']
candidates2005[Seat05=='Wrekin, The', Seat05:='The Wrekin']
candidates2005[Seat05=='Ynys-Mon', Seat05:='Ynys Mon']
candidates2005[Seat05=='York, City of', Seat05:='City of York']
candidates2005[Seat05=='Yorkshire East', Seat05:='East Yorkshire']
candidates2005[Seat05=='Linlithgow and East Falkir', Seat05:='Linlithgow and East Falkirk']

# merge candidates with estimated turnout
candidates2005 <- merge(candidates2005,turnout2010,by.x='Seat05',by.y='PCON05NM',all.x=TRUE)

# compute vote shares, change variable names
candidates2005[party_name=='Liberal Democrats', party_name:='Liberal Democrat']
candidates2005 <- cbind(candidates2005, dummy(candidates2005$party_name, sep = '_', drop = FALSE))
candidates2005[, turnout:=(di.turnout/di.electorate)]
candidates2005[, electorate:=round(di.electorate)]
candidates2005[, c('VoteShare_CON', 'VoteShare_LAB', 'VoteShare_LIBDEM'):=.((di.con_vote/di.turnout),(di.lab_vote/di.turnout),(di.lib_vote/di.turnout))]
candidates2005[,election:=2005]
candidates2005[,next_election_year:=2010]
setnames(candidates2005, c('Seat05', 'candidate_incumbent', 'PA05'), c('constituency_name', 'incumbent', 'ons_id'))

## Add demographic variables from 2001 Census with 1995 constituency boundaries
census2001 <- fread('./data/clean/census_2001_demographic_data.csv')

candidates2005 <- merge(candidates2005,census2001,by='constituency_name',all.x = TRUE)
# change variable names to append with main data
setnames(candidates2005,
         c('candidates2005_Conservative', 'candidates2005_Labour', 'candidates2005_Liberal Democrat'),
         c('candidates_Conservative', 'candidates_Labour', 'candidates_Liberal Democrat'))

fwrite(candidates2005, file = './data/clean/data_2005_analysis.csv')
#################### END ####################


